home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
FishMarket 1.0
/
FishMarket v1.0.iso
/
fishies
/
001-025
/
disk_006
/
mult
/
mult.c
< prev
next >
Wrap
C/C++ Source or Header
|
1992-05-06
|
10KB
|
425 lines
/*
* mult.c
* dennis bednar 08 08 85 Original creation.
* dennis bednar 01 09 86 Added -F flag, added debug flag.
* report bugs/suggestions etc. to dennis@rlgvax.uucp
*
* mult read the input (stdin or file(s)), comparing adjacent lines.
* In the normal case, the second, and succeeding copies of repeated
* lines are output to stdout.
* Note that repeated lines must be adjacent, see sort(1).
* This tool is sort of the opposite of uniq.
*
* -fn = use field number n in each line for the comparison, n = 1 = first.
* Note - in the 2 lines " abc def" and "abc def", "abc" is field # 1,
* and "def" is field number 2, multiple white space chars are field separators.
*
* -a = output 1st of multiple occurences
* Note - this flag is very useful in conjunction with -fn flag.
* Example: trying to find all include files which are in multiple dirs:
* with input sorted by 1st column:
stdio.h /usr/include
stdio.h /tmp/junk
* we would use both "-f1 -a" flags to print only those lines in which
* include files were in more than one directory, but not outputing
* those lines in which include files were in only one directory.
*
*/
#include <stdio.h>
char *cmd; /* in case of error */
int aflag; /* 1 if -a */
int dflag; /* 1 if -d debug */
int fflag; /* 1 if -fn */
char Fflag = '\0'; /* field separator, 0 = white space, else one char */
int fieldnum; /* value of # in -f# option, valid if fflag == 1 */
extern char *u_errmesg();
/* f/w ref */
char *find_field();
main(argc, argv)
int argc;
char **argv;
{
int i;
FILE *infp;
cmd = argv[0];
/* loop thru args, stopping at end of args or first file name */
for (i = 1; i < argc; ++i)
{
if (argv[i][0] != '-')
break; /* found first non-option, ie 1st filename */
if (strcmp(argv[i], "-a") == 0)
{
aflag=1;
continue;
}
/* get debug flag */
if (strcmp(argv[i], "-d") == 0)
{
++dflag; /* enable debugging */
printf("Debugging on\n");
continue; /* goto next argument */
}
/* get field number */
if (strncmp(argv[i], "-f", 2) == 0)
{
if (fflag)
goto usage; /* only one -fn allowed */
fflag = 1;
if (argv[i][2] == '\0')
goto usage;
fieldnum = atoi(argv[i]+2);
if (fieldnum <= 0)
{
fprintf(stderr, "%s: 'field number' must be positive\n", cmd);
goto usage;
}
continue;
}
/* get field separator character */
if (strncmp(argv[i], "-F", 2) == 0)
{
if (Fflag)
goto usage; /* only one -Fc allowed */
Fflag = argv[i][2]; /* save field separator char */
if (argv[i][2] == '\0')
goto usage; /* no field separator */
continue;
}
usage:
fprintf(stderr, "usage: %s [-a] [-d] [-fn] [-Fc] [file ...]\n", cmd);
fprintf(stderr, " outputs 2nd, 3rd, ... of multiple lines\n");
fprintf(stderr, " -a = also output 1st one of multiple lines\n");
fprintf(stderr, " -d = debug\n");
fprintf(stderr, " -fn = use field number n to compare instead of line, 1=1st field,\n");
fprintf(stderr, " with white space as field separator\n");
fprintf(stderr, " -Fc = means use character 'c' as the field separator\n");
exit(1);
}
if (i == argc) /* no file names given */
mult(stdin); /* so read from stdin */
else
for ( ;i < argc; ++i) /* use given file names */
{
infp = fopen( argv[i], "r");
if (infp == (FILE *)NULL)
{
fprintf(stderr, "%s: cant open %s: %s\n", cmd, argv[i], u_errmesg());
continue;
}
mult( infp );
fclose(infp);
}
}
/* save the lines here */
struct t_line
{
#define LINESIZE 2048
char linebuf [ LINESIZE ];
} line [2];
/* use index for faster copy!! */
int old = 0; /* index of old line */
int new = 1; /* index of new line */
/* state flag to help decide actions based on state transitions */
#define S_START 0
#define S_UNIQLINE 1 /* saw 1st line or new one different than the old */
#define S_MULTLINE 2 /* saw new line which is same as the first */
int state = S_START;
/* address of the first character in each line buffer */
#define OLDLINE line[old].linebuf
#define NEWLINE line[new].linebuf
mult( infp )
FILE *infp;
{
int isdiff; /* 1 iff old line != new line */
/* keep reading lines until eof */
while (1)
{
/* this is not very efficient, but its the only way
* I could think of, otherwise main() gets ugly.
*/
/* read in next line from input */
if (fgets(NEWLINE, LINESIZE, infp) == NULL)
return; /* EOF - no state transition */
stripnl(NEWLINE); /* remove ending newline from string */
/* first time mult() is called, we must save the 1st line
* read as the 'oldline' for comparing against future 'newline's
*/
if (state == S_START)
{
swapline(); /* copy new line to old line */
state = S_UNIQLINE;
continue; /* get next line */
}
/* compare the old vs new line, since needed in both states */
/* compute it once to make code more efficient */
#define DIFF strcmp
if (fflag) /* compare by field ? */
/* yes, pass the global fieldnum so that same_field()
* is kept modular, and reusable in other applications
*/
isdiff = !same_field(OLDLINE, NEWLINE, fieldnum);
else /* no compare entire line */
isdiff = (DIFF(OLDLINE, NEWLINE));
if (state == S_UNIQLINE)
{
if (isdiff)
{
swapline();
/* stay in same state */
}
else
{
if (aflag)
printf("%s\n", OLDLINE);
printf("%s\n", NEWLINE);
swapline();
state = S_MULTLINE;
}
}
else if (state == S_MULTLINE)
{
if (isdiff)
{
swapline();
state = S_UNIQLINE;
}
else
{
printf("%s\n", NEWLINE);
swapline();
/* stay in multiple line state */
}
}
}
}
/*
* swap old line with new line
* Called after read into new line, so that effect is same as copying
* newline to old line, and discarding newline.
*/
swapline()
{
register int t; /* temp */
t = old;
old = new;
new = t;
}
/*
* return 1 iff field number 'fieldnum' (1=1st) is same in
* old line vs. new line.
*/
same_field(oldline, newline, fieldnum)
char *oldline,
*newline;
int fieldnum;
{
char *op, /* old field ptr */
*np; /* new field ptr */
op = find_field(oldline, fieldnum);
if (dflag) /* debug */
{
/* dump out the fields being compared */
char *cp;
printf("Old field %d = <", fieldnum);
if (*op == '\0') /* past last field in line */
printf("UNDEF");
else
for (cp = op; *cp && !field_dlm(*cp); ++cp)
printf("%c", *cp);
printf("> ");
printf("Old line = <%s>\n", oldline);
}
np = find_field(newline, fieldnum);
if (dflag)
{
char *cp;
printf("New field %d = <", fieldnum);
if (*np == '\0') /* past last field in line */
printf("UNDEF");
else
for (cp = np; *cp && !field_dlm(*cp); ++cp)
printf("%c", *cp);
printf("> ");
printf("New line = <%s>\n", newline);
}
if (*op == '\0' || *np == '\0') /* is either field non-existent ? */
return 0; /* assume failed to match */
/* compare fields until either one ends */
/* a field ends with either a non-zero delimiter or a '\0' char */
for ( ; *op || *np; ++op, ++np) /* both strings not exhausted */
{
/* Important: Please note that field_dlm() checks for '\0' also */
if (field_dlm(*op) && field_dlm(*np)) /* both reached end */
return 1; /* hit end of field */
/* next cmp will handle case when only one field delimiter */
if (*op != *np) /* cmp both chars in the field */
return 0; /* failed to match */
/* both matched, keep going */
}
/* both strings hit EOS, so matched that way */
return 1; /* matched */
}
/*
* return 1 iff a field delimiter such as white space or end of string
* a null char is always a field delimiter, because the null replaces
* the last newline after the line has been read in.
*/
field_dlm(c)
char c;
{
if (c == '\0') /* is it a null at End of String ? */
return 1; /* yes, return true, because a delimiter */
if (Fflag) /* field separator defined ? */
return (c == Fflag); /* yes, see if it matches the one given */
else /* no, must check for white space */
return (c == ' ' || c == '\t' || c == '\n');
}
/*
* return ptr to 'num' nth field, 1 = first field in the buffer.
* return ptr to '\0' if ask for a field not present
*/
char *
find_field (line, num)
char *line;
int num;
{
char *cp; /* ptr to return */
/* must ask for valid field number */
if (num < 1)
return (line+strlen(line)); /* '\0' */
/* beginning of line */
cp = line;
while ( num-- > 0)
{
/* skip poss leading white space */
#define iswhite(c) ( (((c) & 0xff) == '\t') || (((c) & 0xff) == ' ') )
if (Fflag) /* using non-white field delimiter */
; /* so first char is field 1 */
else /* using white space field dlm */
{
while (*cp && iswhite(*cp))
++cp;
/* cp is now at '\0' EOS or 1st non-white */
}
/* stop if at beginning of desired field */
if (num <= 0)
break;
/* else skip over this symbol to either End of String
* or next white space , or next delimiter.
*/
/* now find the last char of this symbol */
if (Fflag) /* non-white field delimiter */
{
while (*cp && !field_dlm(*cp))
++cp;
/* hit '\0' EOS or field delimiter */
if (*cp) /* fld */
++cp; /* so make it point to begin of next field */
else
; /* don't go past end of string !!! */
}
else /* white space delimiter */
{
while (*cp && !iswhite(*cp))
++cp;
/* cp points to EOS or next white space char */
}
}
return cp;
}
/*
* strip ending new line from string returned by fgets.
* If not present as last char , then line too long.
*/
stripnl(s)
char *s;
{
char *cp;
cp = &s[strlen(s) - 1]; /* ptr to last char of string */
if (*cp == '\n') /* is last char a new line */
*cp = '\0'; /* yes, remove it */
else
{
fprintf(stderr, "%s: error line <%s>... was too long\n", cmd, s);
exit(1);
}
}
/*
* return the error message string using errno
* More flexibility than perror(3).
*/
char *
u_errmesg()
{
#ifdef unix
extern int errno;
extern int sys_nerr;
extern char *sys_errlist[];
static char buffer[50];
if (errno < 0 || errno >= sys_nerr)
{
sprintf( buffer, "errno %d undefined (%d=max)", errno, sys_nerr);
return(buffer);
}
return( sys_errlist[errno] );
#else
return ("unknown error");
#endif
}